*************************************************************************************************
/*																								
		Purpose: Bring in 1988-2010 GSS and assign income scores				
		Creates: GSS_88to10_analysis.dta										
		Note: Will only keep '88-'10 cross sections because one of two father   
		occupation variables cleaned in this file (paocc80) is only available   
		from '88 to '10.
*/				                                        																								*
*************************************************************************************************

clear all
set more off
set maxvar 10000

cd "$Mydirectory1/1_DataSources/GSS/"

* Bring in GSS data
use ./input/GSS7218_R1.dta, clear //download from GSS website
sort year id

keep if year>=1988 & year<=2010

* Obtain CPI
preserve
quietly run "../CPI/CPI_deflator.do"
restore

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

*********************
** OCCUPATION CODES USING THE 1980 CODES
*********************

cd "$Mydirectory1/1_DataSources/GSS/"

/* Convert the 1980 census occupations of paocc80 into the ~30 ANES occupations. 
   Do the same for respondent and spouse occupation.
 */
ren *, lower

foreach var of varlist occ80 spocc80 maocc80 paocc80  {

	rename `var' census1980 
	replace census1980=. if census1980==.i | census1980==.n | census1980==.d  

* Crosswalk here the 1980 occupations to the ANES occs (merge adds one variable: fatheroccej)
	merge m:1 census1980 using "../Crosswalks/Crosswalk_1980Census_toANES.dta"
	tab census1980 if _merge==1
	assert census1980==. if _merge==1
	drop if _merge==2
	
	count if _merge==1 & fatherocc==.
	drop _merge

* Replace people who are self-employed managers (currently coded as 28) with 21 if they are self-employed
	
if "`var'"=="occ80" {
	
	replace fatheroccej=21 if fatheroccej==28 & wrkslf==1
	
	rename fatheroccej occR_80
	rename census1980 `var'
}
if "`var'"=="spocc80" {
	
	replace fatheroccej=21 if fatheroccej==28 & spwrkslf==1

	rename fatheroccej occSP_80
	rename census1980 `var'
}

if "`var'"=="maocc80"	{
	 replace fatheroccej=21 if fatheroccej==28 & mawrkslf==1
	 ren fatheroccej motheroccej_80
	 
	 rename census1980 `var'

}

if "`var'"=="paocc80" {
	replace fatheroccej=21 if fatheroccej==28 & pawrkslf==1
	rename census1980 `var'
}
}

	rename fatheroccej fatheroccej_80
	label var fatheroccej_80 "Father's occupation, coarsened"
	label var occSP "Spouse occupation, coarsened"
	label var occR "Resp. occupation, coarsened"
	label var motheroccej_80 "Mother's occupation, coarsened"

	
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
	
*********************
** OCCUPATIONS USING 2010 CODES 
*********************

/* Using a GSS-provided crosswalk, convert the 2010 census occupations of paocc10 
   into the ~30 ANES occupations. Do the same for respondent and spouse occupation.
*/

foreach var of varlist occ10 spocc10 paocc10   {

	rename `var' census2010 
	replace census2010=. if census2010==.i | census2010==.n | census2010==.d  | census2010==9997

* Crosswalk here the 2010 occupations to the ANES occs (merge adds one variable: fatheroccej)
	merge m:1 census2010 using "../Crosswalks/Crosswalk_2010Census_toANES.dta"
	tab census2010 if _merge==1, nol
	assert census2010==. if _merge==1
	drop if _merge==2
	
	rename fatheroccej_2010 fatheroccej
		
*---------*

	count if _merge==1 & fatheroccej==.
	drop _merge

* Replace people who are self-employed managers (currently coded as 28) with 21 if they are self-employed
	
if "`var'"=="occ10" {
	
	replace fatheroccej=21 if fatheroccej==28 & wrkslf==1
	
	rename fatheroccej occR_10
	rename census2010 `var'
}
if "`var'"=="spocc10" {
	
	replace fatheroccej=21 if fatheroccej==28 & spwrkslf==1
	
	rename fatheroccej occSP_10
	rename census2010 `var'
}
if "`var'"=="paocc10" {
	replace fatheroccej=21 if fatheroccej==28 & pawrkslf==1
	rename census2010 `var'
}
}

	rename fatheroccej fatheroccej10
	label var fatheroccej10 "Father's occupation, coarsened"
	label var occSP_10 "Spouse occupation, coarsened"
	label var occR_10 "Resp. occupation, coarsened"


*------------------------------------------------------------------------------*	
*------------------------------------------------------------------------------*

*********************
** TOTAL FAMILY INCOME 
*********************
	
* Midpoints of 1988-1990 income variable (use "income86" to construct)
	gen income86_bins=.
	replace income86_bins=0.75*4000 if income86>=1 & income86<=3 //<4k
	replace income86_bins=5000 if income86==4 | income86==5 //4-6k
	replace income86_bins=7000 if income86==6 | income86==7 //6-8k
	replace income86_bins=10250 if income86==8 | income86==9 //8-12.5k
	replace income86_bins=15000 if income86==10 | income86==11 //12.5-17.5k
	replace income86_bins=20000 if income86==12 | income86==13 //17.5-22.5k
	replace income86_bins=26250 if income86==14 | income86==15 //22.5-30k
	replace income86_bins=35000 if income86==16 | income86==17 //30-40
	replace income86_bins=50000 if income86==18 | income86==19 //40-60 
	replace income86_bins=1.25*60000 if income86==20 //60k+
	replace income86_bins=. if year>1990 
	
* Midpoints of 1991-1996 income variable (use "income91" to construct)
	gen income91_bins=.
	replace income91_bins=0.75*5000 if income91>=1 & income91<=4 //<5k
	replace income91_bins=6000 if income91==5 | income91==6 //5-7k
	replace income91_bins=8500 if income91==7 | income91==8 //7-10k
	replace income91_bins=12500 if income91==9 | income91==10 //10-15
	replace income91_bins=17500 if income91==11 | income91==12 //15-20
	replace income91_bins=22500 if income91==13 | income91==14 //20-25
	replace income91_bins=30000 if income91==15 | income91==16 //25-35
	replace income91_bins=42500 if income91==17 | income91==18 //35-50
	replace income91_bins=62500 if income91==19 | income91==20 //50-75 
	replace income91_bins=1.25*75000 if income91==21 //75k+
	replace income91_bins=. if year<1991 | year>1996 
	
* Midpoints of 1998-2004 income variable (use "income98" to construct)
	gen income98_bins=.
	replace income98_bins=0.75*6000 if income98>=1 & income98<=5 //<6k
	replace income98_bins=8000 if income98>=6 & income98<=8 //6-10k
	replace income98_bins=12500 if income98==9 | income98==10 //10-15
	replace income98_bins=17500 if income98==11 | income98==12 //15-20
	replace income98_bins=22500 if income98==13 | income98==14 //20-25
	replace income98_bins=30000 if income98==15 | income98==16 //25-35
	replace income98_bins=42500 if income98==17 | income98==18 //35-50
	replace income98_bins=62500 if income98==19 | income98==20 //50-75 
	replace income98_bins=92500 if income98==21 | income98==22 //75-110 
	replace income98_bins=1.25*110000 if income98==23 //110k+
	replace income98_bins=. if year<1998 | year>2004
	
* Midpoints of 2006-2014 income variable (use "income06" to construct)
	gen income06_bins=.
	replace income06_bins=0.75*10000 if income06>=1 & income06<=8 //<10k
	replace income06_bins=12500 if income06==9 | income06==10 //10-15k
	replace income06_bins=17500 if income06==11 | income06==12 //15-20
	replace income06_bins=22500 if income06==13 | income06==14 //20-25
	replace income06_bins=30000 if income06==15 | income06==16 //25-35
	replace income06_bins=42500 if income06==17 | income06==18 //35-50
	replace income06_bins=62500 if income06==19 | income06==20 //50-75 
	replace income06_bins=92500 if income06==21 | income06==22 //75-110 
	replace income06_bins=130000 if income06==23 | income06==24 //110-150 
	replace income06_bins=1.25*150000 if income06==25 //150k+
	replace income06_bins=. if year<2006
	
	
/* Construct *one* family income variable by blending binned 
   income variables from previous steps */
   	gen faminc=.
	replace faminc=income86_bins if year<1991
	replace faminc=income91_bins if year>=1991 & year<=1996
	replace faminc=income98_bins if year>=1998 & year<=2004
	replace faminc=income06_bins if year>=2006 & year<=2014
	label var faminc "Family income using bins"

/*
	Note: The suffix "_son" of the following variables is used to match the 
	      variable names in other datasets. All respondents (i.e., male and female) 
	      are given a value for these variables. 
*/	
	gen bottomcoded_son =.
	
	replace bottomcoded_son= (income86_bins==0.75*4000) if (year<1991 & income86_bins<.)
	tab bottomcoded_son if year<1991, m
	tab income86_bins,m 
	tab bottomcoded_son,m 
	
	replace bottomcoded_son= (income91_bins==0.75*5000) if ((year>=1991 & year<=1996) & income91_bins<.)
	tab bottomcoded_son if (year>=1991 & year<=1996), m
	tab income91_bins,m 
	tab bottomcoded_son,m 

	replace bottomcoded_son= (income98_bins==0.75*6000) if ((year>=1998 & year<=2004) & income98_bins<.)
	tab bottomcoded_son if (year>=1998 & year<=2004), m
	tab income98_bins,m 
	tab bottomcoded_son,m 
	
	replace bottomcoded_son= (income06_bins==0.75*10000) if ((year>=2006 & year<=2014) & income06_bins<.)
	tab bottomcoded_son if (year>=2006 & year<=2014), m
	tab income06_bins,m 
	tab bottomcoded_son,m 


	gen topcoded_son =.
	
	replace topcoded_son= (income86_bins==1.25*60000) if (year<1991 & income86_bins<.)
	tab topcoded_son if year<1991, m
	tab income86_bins,m 
	tab topcoded_son,m 
	
	replace topcoded_son= (income91_bins==1.25*75000) if ((year>=1991 & year<=1996) & income91_bins<.)
	tab topcoded_son if (year>=1991 & year<=1996), m
	tab income91_bins,m 
	tab topcoded_son,m 
	
	replace topcoded_son= (income98_bins==1.25*110000) if ((year>=1998 & year<=2004) & income98_bins<.)
	tab topcoded_son if (year>=1998 & year<=2004), m
	tab income98_bins,m 
	tab topcoded_son,m 
	
	replace topcoded_son= (income06_bins==1.25*150000) if ((year>=2006 & year<=2014) & income06_bins<.)
	tab topcoded_son if (year>=2006 & year<=2014), m
	tab income06_bins,m 
	tab topcoded_son,m 

* Turn fam_inc into 1950 dollars using the CPI: https://data.bls.gov/timeseries/CUUR0000SA0 
	gen year_CPI = year-1 //Subtract 1 to reflect income having been reported for the year prior to survey year. Ensures the right CPIyear numbers are merged.
	merge m:1 year_CPI using "../CPI/CPI_deflator.dta"
		keep if _merge==3
		drop _merge
	
	gen fam_inc_real =.
	replace fam_inc_real = faminc * deflator 
	label var fam_inc_real "Family income (bins), in 1950 dollars"

	gen lnfaminc = ln(fam_inc_real)
	label var lnfaminc "Logged family income (bins)"

	
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
	
*******************
***** RESTRICTING SAMPLE
*******************

* keep relevant variables
	keep year id occR_80-lnfaminc age
	drop CPI* year_CPI income*_bins

	keep if age>=30 & age<=50
	compress
	save ./output/GSS_88to10_analysis.dta, replace
